{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "3v67nxM7Rgzw"
      },
      "outputs": [],
      "source": [
        "import pandas as pd"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "zz_YuuR1QnGd",
        "outputId": "3bd9e08a-0521-43b7-e457-d0a1c8935887"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Mounted at /content/drive\n"
          ]
        }
      ],
      "source": [
        "from google.colab import drive\n",
        "drive.mount('/content/drive')"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "DDtsohO6RV2_"
      },
      "outputs": [],
      "source": [
        "path= \"/content/drive/MyDrive/data/MCIOpenData/Major_Crime_Indicators_Open_Data.csv\""
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "LFqO_4IcRY9F"
      },
      "outputs": [],
      "source": [
        "#import data\n",
        "df = pd.read_csv(path,sep=',')"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 394
        },
        "id": "Y59yOGoCRnKJ",
        "outputId": "ac15ef8b-1907-4883-a6dc-1e841cf73859"
      },
      "outputs": [
        {
          "data": {
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "dataframe",
              "variable_name": "df"
            },
            "text/html": [
              "\n",
              "  <div id=\"df-2c1ac229-3703-4e71-a14f-32a3ff342940\" class=\"colab-df-container\">\n",
              "    <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>X</th>\n",
              "      <th>Y</th>\n",
              "      <th>OBJECTID</th>\n",
              "      <th>EVENT_UNIQUE_ID</th>\n",
              "      <th>REPORT_DATE</th>\n",
              "      <th>OCC_DATE</th>\n",
              "      <th>REPORT_YEAR</th>\n",
              "      <th>REPORT_MONTH</th>\n",
              "      <th>REPORT_DAY</th>\n",
              "      <th>REPORT_DOY</th>\n",
              "      <th>...</th>\n",
              "      <th>UCR_CODE</th>\n",
              "      <th>UCR_EXT</th>\n",
              "      <th>OFFENCE</th>\n",
              "      <th>MCI_CATEGORY</th>\n",
              "      <th>HOOD_158</th>\n",
              "      <th>NEIGHBOURHOOD_158</th>\n",
              "      <th>HOOD_140</th>\n",
              "      <th>NEIGHBOURHOOD_140</th>\n",
              "      <th>LONG_WGS84</th>\n",
              "      <th>LAT_WGS84</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>-8.841785e+06</td>\n",
              "      <td>5.410286e+06</td>\n",
              "      <td>1</td>\n",
              "      <td>GO-20141260127</td>\n",
              "      <td>2014/01/01 05:00:00+00</td>\n",
              "      <td>2014/01/01 05:00:00+00</td>\n",
              "      <td>2014</td>\n",
              "      <td>January</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>...</td>\n",
              "      <td>1420</td>\n",
              "      <td>110</td>\n",
              "      <td>Assault Bodily Harm</td>\n",
              "      <td>Assault</td>\n",
              "      <td>84</td>\n",
              "      <td>Little Portugal</td>\n",
              "      <td>84</td>\n",
              "      <td>Little Portugal (84)</td>\n",
              "      <td>-79.427105</td>\n",
              "      <td>43.642517</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>-8.838126e+06</td>\n",
              "      <td>5.412178e+06</td>\n",
              "      <td>2</td>\n",
              "      <td>GO-20141263725</td>\n",
              "      <td>2014/01/01 05:00:00+00</td>\n",
              "      <td>2014/01/01 05:00:00+00</td>\n",
              "      <td>2014</td>\n",
              "      <td>January</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>...</td>\n",
              "      <td>1420</td>\n",
              "      <td>100</td>\n",
              "      <td>Assault With Weapon</td>\n",
              "      <td>Assault</td>\n",
              "      <td>78</td>\n",
              "      <td>Kensington-Chinatown</td>\n",
              "      <td>78</td>\n",
              "      <td>Kensington-Chinatown (78)</td>\n",
              "      <td>-79.394234</td>\n",
              "      <td>43.654811</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>-8.841053e+06</td>\n",
              "      <td>5.409639e+06</td>\n",
              "      <td>3</td>\n",
              "      <td>GO-20141264272</td>\n",
              "      <td>2014/01/01 05:00:00+00</td>\n",
              "      <td>2014/01/01 05:00:00+00</td>\n",
              "      <td>2014</td>\n",
              "      <td>January</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>...</td>\n",
              "      <td>1420</td>\n",
              "      <td>100</td>\n",
              "      <td>Assault With Weapon</td>\n",
              "      <td>Assault</td>\n",
              "      <td>85</td>\n",
              "      <td>South Parkdale</td>\n",
              "      <td>85</td>\n",
              "      <td>South Parkdale (85)</td>\n",
              "      <td>-79.420532</td>\n",
              "      <td>43.638306</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>-8.850461e+06</td>\n",
              "      <td>5.420267e+06</td>\n",
              "      <td>4</td>\n",
              "      <td>GO-20141263706</td>\n",
              "      <td>2014/01/01 05:00:00+00</td>\n",
              "      <td>2014/01/01 05:00:00+00</td>\n",
              "      <td>2014</td>\n",
              "      <td>January</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>...</td>\n",
              "      <td>1430</td>\n",
              "      <td>100</td>\n",
              "      <td>Assault</td>\n",
              "      <td>Assault</td>\n",
              "      <td>113</td>\n",
              "      <td>Weston</td>\n",
              "      <td>113</td>\n",
              "      <td>Weston (113)</td>\n",
              "      <td>-79.505043</td>\n",
              "      <td>43.707365</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>-8.840470e+06</td>\n",
              "      <td>5.431266e+06</td>\n",
              "      <td>5</td>\n",
              "      <td>GO-20141263710</td>\n",
              "      <td>2014/01/01 05:00:00+00</td>\n",
              "      <td>2014/01/01 05:00:00+00</td>\n",
              "      <td>2014</td>\n",
              "      <td>January</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>...</td>\n",
              "      <td>1430</td>\n",
              "      <td>100</td>\n",
              "      <td>Assault</td>\n",
              "      <td>Assault</td>\n",
              "      <td>151</td>\n",
              "      <td>Yonge-Doris</td>\n",
              "      <td>51</td>\n",
              "      <td>Willowdale East (51)</td>\n",
              "      <td>-79.415293</td>\n",
              "      <td>43.778743</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "<p>5 rows × 31 columns</p>\n",
              "</div>\n",
              "    <div class=\"colab-df-buttons\">\n",
              "\n",
              "  <div class=\"colab-df-container\">\n",
              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-2c1ac229-3703-4e71-a14f-32a3ff342940')\"\n",
              "            title=\"Convert this dataframe to an interactive table.\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
              "  </svg>\n",
              "    </button>\n",
              "\n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    .colab-df-buttons div {\n",
              "      margin-bottom: 4px;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "    <script>\n",
              "      const buttonEl =\n",
              "        document.querySelector('#df-2c1ac229-3703-4e71-a14f-32a3ff342940 button.colab-df-convert');\n",
              "      buttonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "      async function convertToInteractive(key) {\n",
              "        const element = document.querySelector('#df-2c1ac229-3703-4e71-a14f-32a3ff342940');\n",
              "        const dataTable =\n",
              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                    [key], {});\n",
              "        if (!dataTable) return;\n",
              "\n",
              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "          + ' to learn more about interactive tables.';\n",
              "        element.innerHTML = '';\n",
              "        dataTable['output_type'] = 'display_data';\n",
              "        await google.colab.output.renderOutput(dataTable, element);\n",
              "        const docLink = document.createElement('div');\n",
              "        docLink.innerHTML = docLinkHtml;\n",
              "        element.appendChild(docLink);\n",
              "      }\n",
              "    </script>\n",
              "  </div>\n",
              "\n",
              "\n",
              "<div id=\"df-f512a2fb-f29f-4e6d-80b7-d8058e59ed7c\">\n",
              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-f512a2fb-f29f-4e6d-80b7-d8058e59ed7c')\"\n",
              "            title=\"Suggest charts\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "     width=\"24px\">\n",
              "    <g>\n",
              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
              "    </g>\n",
              "</svg>\n",
              "  </button>\n",
              "\n",
              "<style>\n",
              "  .colab-df-quickchart {\n",
              "      --bg-color: #E8F0FE;\n",
              "      --fill-color: #1967D2;\n",
              "      --hover-bg-color: #E2EBFA;\n",
              "      --hover-fill-color: #174EA6;\n",
              "      --disabled-fill-color: #AAA;\n",
              "      --disabled-bg-color: #DDD;\n",
              "  }\n",
              "\n",
              "  [theme=dark] .colab-df-quickchart {\n",
              "      --bg-color: #3B4455;\n",
              "      --fill-color: #D2E3FC;\n",
              "      --hover-bg-color: #434B5C;\n",
              "      --hover-fill-color: #FFFFFF;\n",
              "      --disabled-bg-color: #3B4455;\n",
              "      --disabled-fill-color: #666;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart {\n",
              "    background-color: var(--bg-color);\n",
              "    border: none;\n",
              "    border-radius: 50%;\n",
              "    cursor: pointer;\n",
              "    display: none;\n",
              "    fill: var(--fill-color);\n",
              "    height: 32px;\n",
              "    padding: 0;\n",
              "    width: 32px;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart:hover {\n",
              "    background-color: var(--hover-bg-color);\n",
              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "    fill: var(--button-hover-fill-color);\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart-complete:disabled,\n",
              "  .colab-df-quickchart-complete:disabled:hover {\n",
              "    background-color: var(--disabled-bg-color);\n",
              "    fill: var(--disabled-fill-color);\n",
              "    box-shadow: none;\n",
              "  }\n",
              "\n",
              "  .colab-df-spinner {\n",
              "    border: 2px solid var(--fill-color);\n",
              "    border-color: transparent;\n",
              "    border-bottom-color: var(--fill-color);\n",
              "    animation:\n",
              "      spin 1s steps(1) infinite;\n",
              "  }\n",
              "\n",
              "  @keyframes spin {\n",
              "    0% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "      border-left-color: var(--fill-color);\n",
              "    }\n",
              "    20% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    30% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    40% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    60% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    80% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "    90% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "  }\n",
              "</style>\n",
              "\n",
              "  <script>\n",
              "    async function quickchart(key) {\n",
              "      const quickchartButtonEl =\n",
              "        document.querySelector('#' + key + ' button');\n",
              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
              "      try {\n",
              "        const charts = await google.colab.kernel.invokeFunction(\n",
              "            'suggestCharts', [key], {});\n",
              "      } catch (error) {\n",
              "        console.error('Error during call to suggestCharts:', error);\n",
              "      }\n",
              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
              "    }\n",
              "    (() => {\n",
              "      let quickchartButtonEl =\n",
              "        document.querySelector('#df-f512a2fb-f29f-4e6d-80b7-d8058e59ed7c button');\n",
              "      quickchartButtonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "    })();\n",
              "  </script>\n",
              "</div>\n",
              "\n",
              "    </div>\n",
              "  </div>\n"
            ],
            "text/plain": [
              "              X             Y  OBJECTID EVENT_UNIQUE_ID  \\\n",
              "0 -8.841785e+06  5.410286e+06         1  GO-20141260127   \n",
              "1 -8.838126e+06  5.412178e+06         2  GO-20141263725   \n",
              "2 -8.841053e+06  5.409639e+06         3  GO-20141264272   \n",
              "3 -8.850461e+06  5.420267e+06         4  GO-20141263706   \n",
              "4 -8.840470e+06  5.431266e+06         5  GO-20141263710   \n",
              "\n",
              "              REPORT_DATE                OCC_DATE  REPORT_YEAR REPORT_MONTH  \\\n",
              "0  2014/01/01 05:00:00+00  2014/01/01 05:00:00+00         2014      January   \n",
              "1  2014/01/01 05:00:00+00  2014/01/01 05:00:00+00         2014      January   \n",
              "2  2014/01/01 05:00:00+00  2014/01/01 05:00:00+00         2014      January   \n",
              "3  2014/01/01 05:00:00+00  2014/01/01 05:00:00+00         2014      January   \n",
              "4  2014/01/01 05:00:00+00  2014/01/01 05:00:00+00         2014      January   \n",
              "\n",
              "   REPORT_DAY  REPORT_DOY  ... UCR_CODE  UCR_EXT              OFFENCE  \\\n",
              "0           1           1  ...     1420      110  Assault Bodily Harm   \n",
              "1           1           1  ...     1420      100  Assault With Weapon   \n",
              "2           1           1  ...     1420      100  Assault With Weapon   \n",
              "3           1           1  ...     1430      100              Assault   \n",
              "4           1           1  ...     1430      100              Assault   \n",
              "\n",
              "  MCI_CATEGORY  HOOD_158     NEIGHBOURHOOD_158 HOOD_140  \\\n",
              "0      Assault        84       Little Portugal       84   \n",
              "1      Assault        78  Kensington-Chinatown       78   \n",
              "2      Assault        85        South Parkdale       85   \n",
              "3      Assault       113                Weston      113   \n",
              "4      Assault       151           Yonge-Doris       51   \n",
              "\n",
              "           NEIGHBOURHOOD_140 LONG_WGS84  LAT_WGS84  \n",
              "0       Little Portugal (84) -79.427105  43.642517  \n",
              "1  Kensington-Chinatown (78) -79.394234  43.654811  \n",
              "2        South Parkdale (85) -79.420532  43.638306  \n",
              "3               Weston (113) -79.505043  43.707365  \n",
              "4       Willowdale East (51) -79.415293  43.778743  \n",
              "\n",
              "[5 rows x 31 columns]"
            ]
          },
          "execution_count": 5,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "df.head()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "EV063BbdRsz6",
        "outputId": "52764caa-3694-47ca-9c91-7e5f14c531de"
      },
      "outputs": [
        {
          "data": {
            "text/plain": [
              "Index(['X', 'Y', 'OBJECTID', 'EVENT_UNIQUE_ID', 'REPORT_DATE', 'OCC_DATE',\n",
              "       'REPORT_YEAR', 'REPORT_MONTH', 'REPORT_DAY', 'REPORT_DOY', 'REPORT_DOW',\n",
              "       'REPORT_HOUR', 'OCC_YEAR', 'OCC_MONTH', 'OCC_DAY', 'OCC_DOY', 'OCC_DOW',\n",
              "       'OCC_HOUR', 'DIVISION', 'LOCATION_TYPE', 'PREMISES_TYPE', 'UCR_CODE',\n",
              "       'UCR_EXT', 'OFFENCE', 'MCI_CATEGORY', 'HOOD_158', 'NEIGHBOURHOOD_158',\n",
              "       'HOOD_140', 'NEIGHBOURHOOD_140', 'LONG_WGS84', 'LAT_WGS84'],\n",
              "      dtype='object')"
            ]
          },
          "execution_count": 6,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "df.columns"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 320
        },
        "id": "_-0ytz1GTIh2",
        "outputId": "d3a41e7f-ccb7-435f-9ef7-17ff8e8f6b4f"
      },
      "outputs": [
        {
          "data": {
            "application/vnd.google.colaboratory.intrinsic+json": {
              "summary": "{\n  \"name\": \"df\",\n  \"rows\": 8,\n  \"fields\": [\n    {\n      \"column\": \"X\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 4825418.128207227,\n        \"min\": -8865400.4618276,\n        \"max\": 1082235.9601684767,\n        \"num_unique_values\": 8,\n        \"samples\": [\n          -8704010.891908009,\n          -8838113.58389807,\n          384687.0\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Y\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 2624212.5747140665,\n        \"min\": 5.664924e-09,\n        \"max\": 5442747.00467992,\n        \"num_unique_values\": 8,\n        \"samples\": [\n          5337674.081021296,\n          5419037.18846965,\n          384687.0\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"OBJECTID\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 138642.751213066,\n        \"min\": 1.0,\n        \"max\": 384687.0,\n        \"num_unique_values\": 6,\n        \"samples\": [\n          384687.0,\n          192344.0,\n          288515.5\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"REPORT_YEAR\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 135397.23227578247,\n        \"min\": 2.995789994504012,\n        \"max\": 384687.0,\n        \"num_unique_values\": 8,\n        \"samples\": [\n          2018.96082529433,\n          2019.0,\n          384687.0\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"REPORT_DAY\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 136002.16500454693,\n        \"min\": 1.0,\n        \"max\": 384687.0,\n        \"num_unique_values\": 8,\n        \"samples\": [\n          15.746716161450738,\n          16.0,\n          384687.0\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"REPORT_DOY\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 135946.67958190202,\n        \"min\": 1.0,\n        \"max\": 384687.0,\n        \"num_unique_values\": 8,\n        \"samples\": [\n          182.85289079173458,\n          184.0,\n          384687.0\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"REPORT_HOUR\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 136003.29360844236,\n        \"min\": 0.0,\n        \"max\": 384687.0,\n        \"num_unique_values\": 8,\n        \"samples\": [\n          12.70584137233647,\n          13.0,\n          384687.0\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"OCC_YEAR\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 135355.1571587999,\n        \"min\": 3.0300552892528714,\n        \"max\": 384566.0,\n        \"num_unique_values\": 8,\n        \"samples\": [\n          2018.9000431655425,\n          2019.0,\n          384566.0\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"OCC_DAY\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 135959.4426708612,\n        \"min\": 1.0,\n        \"max\": 384566.0,\n        \"num_unique_values\": 8,\n        \"samples\": [\n          15.445226567091215,\n          15.0,\n          384566.0\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"OCC_DOY\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 135903.95261231455,\n        \"min\": 1.0,\n        \"max\": 384566.0,\n        \"num_unique_values\": 8,\n        \"samples\": [\n          182.48536012023945,\n          183.0,\n          384566.0\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"OCC_HOUR\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 136003.2602419678,\n        \"min\": 0.0,\n        \"max\": 384687.0,\n        \"num_unique_values\": 8,\n        \"samples\": [\n          12.55359032148214,\n          14.0,\n          384687.0\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"UCR_CODE\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 135473.38157747983,\n        \"min\": 329.5207723233853,\n        \"max\": 384687.0,\n        \"num_unique_values\": 8,\n        \"samples\": [\n          1711.498784726284,\n          1460.0,\n          384687.0\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"UCR_EXT\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 135960.44370198547,\n        \"min\": 52.27988246936702,\n        \"max\": 384687.0,\n        \"num_unique_values\": 6,\n        \"samples\": [\n          384687.0,\n          147.52061286188513,\n          230.0\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"LONG_WGS84\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 136026.90940862766,\n        \"min\": -79.6392473469942,\n        \"max\": 384687.0,\n        \"num_unique_values\": 8,\n        \"samples\": [\n          -78.18946017343829,\n          -79.3941251522005,\n          384687.0\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"LAT_WGS84\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 135996.11323782383,\n        \"min\": 0.0,\n        \"max\": 384687.0,\n        \"num_unique_values\": 8,\n        \"samples\": [\n          43.04146386093065,\n          43.6993760619933,\n          384687.0\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}",
              "type": "dataframe"
            },
            "text/html": [
              "\n",
              "  <div id=\"df-e3e00f44-ef13-4c94-9647-d37f013efe13\" class=\"colab-df-container\">\n",
              "    <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>X</th>\n",
              "      <th>Y</th>\n",
              "      <th>OBJECTID</th>\n",
              "      <th>REPORT_YEAR</th>\n",
              "      <th>REPORT_DAY</th>\n",
              "      <th>REPORT_DOY</th>\n",
              "      <th>REPORT_HOUR</th>\n",
              "      <th>OCC_YEAR</th>\n",
              "      <th>OCC_DAY</th>\n",
              "      <th>OCC_DOY</th>\n",
              "      <th>OCC_HOUR</th>\n",
              "      <th>UCR_CODE</th>\n",
              "      <th>UCR_EXT</th>\n",
              "      <th>LONG_WGS84</th>\n",
              "      <th>LAT_WGS84</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>count</th>\n",
              "      <td>3.846870e+05</td>\n",
              "      <td>3.846870e+05</td>\n",
              "      <td>384687.000000</td>\n",
              "      <td>384687.000000</td>\n",
              "      <td>384687.000000</td>\n",
              "      <td>384687.000000</td>\n",
              "      <td>384687.000000</td>\n",
              "      <td>384566.000000</td>\n",
              "      <td>384566.000000</td>\n",
              "      <td>384566.000000</td>\n",
              "      <td>384687.000000</td>\n",
              "      <td>384687.000000</td>\n",
              "      <td>384687.000000</td>\n",
              "      <td>384687.000000</td>\n",
              "      <td>384687.000000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>mean</th>\n",
              "      <td>-8.704011e+06</td>\n",
              "      <td>5.337674e+06</td>\n",
              "      <td>192344.000000</td>\n",
              "      <td>2018.960825</td>\n",
              "      <td>15.746716</td>\n",
              "      <td>182.852891</td>\n",
              "      <td>12.705841</td>\n",
              "      <td>2018.900043</td>\n",
              "      <td>15.445227</td>\n",
              "      <td>182.485360</td>\n",
              "      <td>12.553590</td>\n",
              "      <td>1711.498785</td>\n",
              "      <td>147.520613</td>\n",
              "      <td>-78.189460</td>\n",
              "      <td>43.041464</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>std</th>\n",
              "      <td>1.082236e+06</td>\n",
              "      <td>6.636856e+05</td>\n",
              "      <td>111049.715839</td>\n",
              "      <td>2.995790</td>\n",
              "      <td>8.771914</td>\n",
              "      <td>105.137876</td>\n",
              "      <td>6.464899</td>\n",
              "      <td>3.030055</td>\n",
              "      <td>8.932416</td>\n",
              "      <td>105.455768</td>\n",
              "      <td>7.278063</td>\n",
              "      <td>329.520772</td>\n",
              "      <td>52.279882</td>\n",
              "      <td>9.721891</td>\n",
              "      <td>5.351631</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>min</th>\n",
              "      <td>-8.865400e+06</td>\n",
              "      <td>5.664924e-09</td>\n",
              "      <td>1.000000</td>\n",
              "      <td>2014.000000</td>\n",
              "      <td>1.000000</td>\n",
              "      <td>1.000000</td>\n",
              "      <td>0.000000</td>\n",
              "      <td>2000.000000</td>\n",
              "      <td>1.000000</td>\n",
              "      <td>1.000000</td>\n",
              "      <td>0.000000</td>\n",
              "      <td>1410.000000</td>\n",
              "      <td>100.000000</td>\n",
              "      <td>-79.639247</td>\n",
              "      <td>0.000000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>25%</th>\n",
              "      <td>-8.847013e+06</td>\n",
              "      <td>5.412942e+06</td>\n",
              "      <td>96172.500000</td>\n",
              "      <td>2016.000000</td>\n",
              "      <td>8.000000</td>\n",
              "      <td>90.000000</td>\n",
              "      <td>8.000000</td>\n",
              "      <td>2016.000000</td>\n",
              "      <td>8.000000</td>\n",
              "      <td>90.000000</td>\n",
              "      <td>6.000000</td>\n",
              "      <td>1430.000000</td>\n",
              "      <td>100.000000</td>\n",
              "      <td>-79.474066</td>\n",
              "      <td>43.659776</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>50%</th>\n",
              "      <td>-8.838114e+06</td>\n",
              "      <td>5.419037e+06</td>\n",
              "      <td>192344.000000</td>\n",
              "      <td>2019.000000</td>\n",
              "      <td>16.000000</td>\n",
              "      <td>184.000000</td>\n",
              "      <td>13.000000</td>\n",
              "      <td>2019.000000</td>\n",
              "      <td>15.000000</td>\n",
              "      <td>183.000000</td>\n",
              "      <td>14.000000</td>\n",
              "      <td>1460.000000</td>\n",
              "      <td>100.000000</td>\n",
              "      <td>-79.394125</td>\n",
              "      <td>43.699376</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>75%</th>\n",
              "      <td>-8.830017e+06</td>\n",
              "      <td>5.426963e+06</td>\n",
              "      <td>288515.500000</td>\n",
              "      <td>2022.000000</td>\n",
              "      <td>23.000000</td>\n",
              "      <td>274.000000</td>\n",
              "      <td>18.000000</td>\n",
              "      <td>2022.000000</td>\n",
              "      <td>23.000000</td>\n",
              "      <td>274.000000</td>\n",
              "      <td>19.000000</td>\n",
              "      <td>2120.000000</td>\n",
              "      <td>200.000000</td>\n",
              "      <td>-79.321396</td>\n",
              "      <td>43.750830</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>max</th>\n",
              "      <td>6.327780e-09</td>\n",
              "      <td>5.442747e+06</td>\n",
              "      <td>384687.000000</td>\n",
              "      <td>2024.000000</td>\n",
              "      <td>31.000000</td>\n",
              "      <td>366.000000</td>\n",
              "      <td>23.000000</td>\n",
              "      <td>2024.000000</td>\n",
              "      <td>31.000000</td>\n",
              "      <td>366.000000</td>\n",
              "      <td>23.000000</td>\n",
              "      <td>2135.000000</td>\n",
              "      <td>230.000000</td>\n",
              "      <td>0.000000</td>\n",
              "      <td>43.853164</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "    <div class=\"colab-df-buttons\">\n",
              "\n",
              "  <div class=\"colab-df-container\">\n",
              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-e3e00f44-ef13-4c94-9647-d37f013efe13')\"\n",
              "            title=\"Convert this dataframe to an interactive table.\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
              "  </svg>\n",
              "    </button>\n",
              "\n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    .colab-df-buttons div {\n",
              "      margin-bottom: 4px;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "    <script>\n",
              "      const buttonEl =\n",
              "        document.querySelector('#df-e3e00f44-ef13-4c94-9647-d37f013efe13 button.colab-df-convert');\n",
              "      buttonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "      async function convertToInteractive(key) {\n",
              "        const element = document.querySelector('#df-e3e00f44-ef13-4c94-9647-d37f013efe13');\n",
              "        const dataTable =\n",
              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                    [key], {});\n",
              "        if (!dataTable) return;\n",
              "\n",
              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "          + ' to learn more about interactive tables.';\n",
              "        element.innerHTML = '';\n",
              "        dataTable['output_type'] = 'display_data';\n",
              "        await google.colab.output.renderOutput(dataTable, element);\n",
              "        const docLink = document.createElement('div');\n",
              "        docLink.innerHTML = docLinkHtml;\n",
              "        element.appendChild(docLink);\n",
              "      }\n",
              "    </script>\n",
              "  </div>\n",
              "\n",
              "\n",
              "<div id=\"df-0f9b56b7-9c3a-4cd2-b37a-9c06a4782eb7\">\n",
              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-0f9b56b7-9c3a-4cd2-b37a-9c06a4782eb7')\"\n",
              "            title=\"Suggest charts\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "     width=\"24px\">\n",
              "    <g>\n",
              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
              "    </g>\n",
              "</svg>\n",
              "  </button>\n",
              "\n",
              "<style>\n",
              "  .colab-df-quickchart {\n",
              "      --bg-color: #E8F0FE;\n",
              "      --fill-color: #1967D2;\n",
              "      --hover-bg-color: #E2EBFA;\n",
              "      --hover-fill-color: #174EA6;\n",
              "      --disabled-fill-color: #AAA;\n",
              "      --disabled-bg-color: #DDD;\n",
              "  }\n",
              "\n",
              "  [theme=dark] .colab-df-quickchart {\n",
              "      --bg-color: #3B4455;\n",
              "      --fill-color: #D2E3FC;\n",
              "      --hover-bg-color: #434B5C;\n",
              "      --hover-fill-color: #FFFFFF;\n",
              "      --disabled-bg-color: #3B4455;\n",
              "      --disabled-fill-color: #666;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart {\n",
              "    background-color: var(--bg-color);\n",
              "    border: none;\n",
              "    border-radius: 50%;\n",
              "    cursor: pointer;\n",
              "    display: none;\n",
              "    fill: var(--fill-color);\n",
              "    height: 32px;\n",
              "    padding: 0;\n",
              "    width: 32px;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart:hover {\n",
              "    background-color: var(--hover-bg-color);\n",
              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "    fill: var(--button-hover-fill-color);\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart-complete:disabled,\n",
              "  .colab-df-quickchart-complete:disabled:hover {\n",
              "    background-color: var(--disabled-bg-color);\n",
              "    fill: var(--disabled-fill-color);\n",
              "    box-shadow: none;\n",
              "  }\n",
              "\n",
              "  .colab-df-spinner {\n",
              "    border: 2px solid var(--fill-color);\n",
              "    border-color: transparent;\n",
              "    border-bottom-color: var(--fill-color);\n",
              "    animation:\n",
              "      spin 1s steps(1) infinite;\n",
              "  }\n",
              "\n",
              "  @keyframes spin {\n",
              "    0% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "      border-left-color: var(--fill-color);\n",
              "    }\n",
              "    20% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    30% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    40% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    60% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    80% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "    90% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "  }\n",
              "</style>\n",
              "\n",
              "  <script>\n",
              "    async function quickchart(key) {\n",
              "      const quickchartButtonEl =\n",
              "        document.querySelector('#' + key + ' button');\n",
              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
              "      try {\n",
              "        const charts = await google.colab.kernel.invokeFunction(\n",
              "            'suggestCharts', [key], {});\n",
              "      } catch (error) {\n",
              "        console.error('Error during call to suggestCharts:', error);\n",
              "      }\n",
              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
              "    }\n",
              "    (() => {\n",
              "      let quickchartButtonEl =\n",
              "        document.querySelector('#df-0f9b56b7-9c3a-4cd2-b37a-9c06a4782eb7 button');\n",
              "      quickchartButtonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "    })();\n",
              "  </script>\n",
              "</div>\n",
              "\n",
              "    </div>\n",
              "  </div>\n"
            ],
            "text/plain": [
              "                  X             Y       OBJECTID    REPORT_YEAR  \\\n",
              "count  3.846870e+05  3.846870e+05  384687.000000  384687.000000   \n",
              "mean  -8.704011e+06  5.337674e+06  192344.000000    2018.960825   \n",
              "std    1.082236e+06  6.636856e+05  111049.715839       2.995790   \n",
              "min   -8.865400e+06  5.664924e-09       1.000000    2014.000000   \n",
              "25%   -8.847013e+06  5.412942e+06   96172.500000    2016.000000   \n",
              "50%   -8.838114e+06  5.419037e+06  192344.000000    2019.000000   \n",
              "75%   -8.830017e+06  5.426963e+06  288515.500000    2022.000000   \n",
              "max    6.327780e-09  5.442747e+06  384687.000000    2024.000000   \n",
              "\n",
              "          REPORT_DAY     REPORT_DOY    REPORT_HOUR       OCC_YEAR  \\\n",
              "count  384687.000000  384687.000000  384687.000000  384566.000000   \n",
              "mean       15.746716     182.852891      12.705841    2018.900043   \n",
              "std         8.771914     105.137876       6.464899       3.030055   \n",
              "min         1.000000       1.000000       0.000000    2000.000000   \n",
              "25%         8.000000      90.000000       8.000000    2016.000000   \n",
              "50%        16.000000     184.000000      13.000000    2019.000000   \n",
              "75%        23.000000     274.000000      18.000000    2022.000000   \n",
              "max        31.000000     366.000000      23.000000    2024.000000   \n",
              "\n",
              "             OCC_DAY        OCC_DOY       OCC_HOUR       UCR_CODE  \\\n",
              "count  384566.000000  384566.000000  384687.000000  384687.000000   \n",
              "mean       15.445227     182.485360      12.553590    1711.498785   \n",
              "std         8.932416     105.455768       7.278063     329.520772   \n",
              "min         1.000000       1.000000       0.000000    1410.000000   \n",
              "25%         8.000000      90.000000       6.000000    1430.000000   \n",
              "50%        15.000000     183.000000      14.000000    1460.000000   \n",
              "75%        23.000000     274.000000      19.000000    2120.000000   \n",
              "max        31.000000     366.000000      23.000000    2135.000000   \n",
              "\n",
              "             UCR_EXT     LONG_WGS84      LAT_WGS84  \n",
              "count  384687.000000  384687.000000  384687.000000  \n",
              "mean      147.520613     -78.189460      43.041464  \n",
              "std        52.279882       9.721891       5.351631  \n",
              "min       100.000000     -79.639247       0.000000  \n",
              "25%       100.000000     -79.474066      43.659776  \n",
              "50%       100.000000     -79.394125      43.699376  \n",
              "75%       200.000000     -79.321396      43.750830  \n",
              "max       230.000000       0.000000      43.853164  "
            ]
          },
          "execution_count": 7,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "df.describe()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "JWMNwp5SvVcR",
        "outputId": "77168e04-6820-46ab-a69b-b5b7412da593"
      },
      "outputs": [
        {
          "data": {
            "text/plain": [
              "X                      0\n",
              "Y                      0\n",
              "OBJECTID               0\n",
              "EVENT_UNIQUE_ID        0\n",
              "REPORT_DATE            0\n",
              "OCC_DATE               0\n",
              "REPORT_YEAR            0\n",
              "REPORT_MONTH           0\n",
              "REPORT_DAY             0\n",
              "REPORT_DOY             0\n",
              "REPORT_DOW             0\n",
              "REPORT_HOUR            0\n",
              "OCC_YEAR             121\n",
              "OCC_MONTH            121\n",
              "OCC_DAY              121\n",
              "OCC_DOY              121\n",
              "OCC_DOW              121\n",
              "OCC_HOUR               0\n",
              "DIVISION               0\n",
              "LOCATION_TYPE          0\n",
              "PREMISES_TYPE          0\n",
              "UCR_CODE               0\n",
              "UCR_EXT                0\n",
              "OFFENCE                0\n",
              "MCI_CATEGORY           0\n",
              "HOOD_158               0\n",
              "NEIGHBOURHOOD_158      0\n",
              "HOOD_140               0\n",
              "NEIGHBOURHOOD_140      0\n",
              "LONG_WGS84             0\n",
              "LAT_WGS84              0\n",
              "dtype: int64"
            ]
          },
          "execution_count": 8,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "df.isna().sum()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "KBCoVLr6vqII",
        "outputId": "3527c221-d06d-439c-ef4e-2dee725f00f2"
      },
      "outputs": [
        {
          "data": {
            "text/plain": [
              "X                    float64\n",
              "Y                    float64\n",
              "OBJECTID               int64\n",
              "EVENT_UNIQUE_ID       object\n",
              "REPORT_DATE           object\n",
              "OCC_DATE              object\n",
              "REPORT_YEAR            int64\n",
              "REPORT_MONTH          object\n",
              "REPORT_DAY             int64\n",
              "REPORT_DOY             int64\n",
              "REPORT_DOW            object\n",
              "REPORT_HOUR            int64\n",
              "OCC_YEAR             float64\n",
              "OCC_MONTH             object\n",
              "OCC_DAY              float64\n",
              "OCC_DOY              float64\n",
              "OCC_DOW               object\n",
              "OCC_HOUR               int64\n",
              "DIVISION              object\n",
              "LOCATION_TYPE         object\n",
              "PREMISES_TYPE         object\n",
              "UCR_CODE               int64\n",
              "UCR_EXT                int64\n",
              "OFFENCE               object\n",
              "MCI_CATEGORY          object\n",
              "HOOD_158              object\n",
              "NEIGHBOURHOOD_158     object\n",
              "HOOD_140              object\n",
              "NEIGHBOURHOOD_140     object\n",
              "LONG_WGS84           float64\n",
              "LAT_WGS84            float64\n",
              "dtype: object"
            ]
          },
          "execution_count": 9,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "df.dtypes"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "fcabZfaMwAIq"
      },
      "outputs": [],
      "source": [
        "df.dropna(subset=['OCC_YEAR', 'OCC_DAY', 'OCC_DOY'], inplace=True)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "Cvc9ciPJSnrX"
      },
      "source": [
        "In this project, i will chose features might be most relevant for predicting crime:\n",
        "\n",
        "**Temporal Features:** OCC_YEAR, OCC_MONTH, OCC_DAY, OCC_DOY, OCC_DOW, OCC_HOUR\n",
        "\n",
        "**Spatial Features:** LONG_WGS84, LAT_WGS84, HOOD_158, NEIGHBOURHOOD_158, HOOD_140, NEIGHBOURHOOD_140\n",
        "\n",
        "**Crime Details:** DIVISION, LOCATION_TYPE, PREMISES_TYPE, MCI_CATEGORY"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "NONvADl1SmRn"
      },
      "outputs": [],
      "source": [
        "selected_columns = [\n",
        "    'OCC_YEAR', 'OCC_MONTH', 'OCC_DAY', 'OCC_DOY', 'OCC_DOW', 'OCC_HOUR',\n",
        "    'LONG_WGS84', 'LAT_WGS84', 'HOOD_158', 'NEIGHBOURHOOD_158',\n",
        "    'HOOD_140', 'NEIGHBOURHOOD_140', 'DIVISION', 'LOCATION_TYPE',\n",
        "    'PREMISES_TYPE', 'MCI_CATEGORY'\n",
        "]"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "k21eKjV3TCp-"
      },
      "outputs": [],
      "source": [
        "df_selected = df[selected_columns]"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "SFqqtmy3TEh2",
        "outputId": "47a3842d-4c3a-47db-d9a1-7c0f8ae97ad8"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "   OCC_YEAR  OCC_DAY  OCC_DOY  OCC_HOUR  LONG_WGS84  LAT_WGS84 MCI_CATEGORY  \\\n",
            "0    2014.0      1.0      1.0         1  -79.427105  43.642517      Assault   \n",
            "1    2014.0      1.0      1.0        18  -79.394234  43.654811      Assault   \n",
            "2    2014.0      1.0      1.0        21  -79.420532  43.638306      Assault   \n",
            "3    2014.0      1.0      1.0        18  -79.505043  43.707365      Assault   \n",
            "4    2014.0      1.0      1.0        18  -79.415293  43.778743      Assault   \n",
            "\n",
            "   OCC_MONTH_April  OCC_MONTH_August  OCC_MONTH_December  ...  \\\n",
            "0            False             False               False  ...   \n",
            "1            False             False               False  ...   \n",
            "2            False             False               False  ...   \n",
            "3            False             False               False  ...   \n",
            "4            False             False               False  ...   \n",
            "\n",
            "   LOCATION_TYPE_Ttc Wheel Trans Vehicle  \\\n",
            "0                                  False   \n",
            "1                                  False   \n",
            "2                                  False   \n",
            "3                                  False   \n",
            "4                                  False   \n",
            "\n",
            "   LOCATION_TYPE_Universities / Colleges  LOCATION_TYPE_Unknown  \\\n",
            "0                                  False                  False   \n",
            "1                                  False                  False   \n",
            "2                                  False                  False   \n",
            "3                                  False                  False   \n",
            "4                                  False                  False   \n",
            "\n",
            "   PREMISES_TYPE_Apartment  PREMISES_TYPE_Commercial  \\\n",
            "0                    False                      True   \n",
            "1                     True                     False   \n",
            "2                     True                     False   \n",
            "3                    False                     False   \n",
            "4                     True                     False   \n",
            "\n",
            "   PREMISES_TYPE_Educational  PREMISES_TYPE_House  PREMISES_TYPE_Other  \\\n",
            "0                      False                False                False   \n",
            "1                      False                False                False   \n",
            "2                      False                False                False   \n",
            "3                      False                 True                False   \n",
            "4                      False                False                False   \n",
            "\n",
            "   PREMISES_TYPE_Outside  PREMISES_TYPE_Transit  \n",
            "0                  False                  False  \n",
            "1                  False                  False  \n",
            "2                  False                  False  \n",
            "3                  False                  False  \n",
            "4                  False                  False  \n",
            "\n",
            "[5 rows x 705 columns]\n"
          ]
        }
      ],
      "source": [
        "df_encoded = pd.get_dummies(df_selected, columns=['OCC_MONTH','OCC_DOW','HOOD_158','NEIGHBOURHOOD_158','HOOD_140','NEIGHBOURHOOD_140','DIVISION', 'LOCATION_TYPE', 'PREMISES_TYPE'])\n",
        "\n",
        "print(df_encoded.head())"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "aC8sAFmSXsPa"
      },
      "outputs": [],
      "source": [
        "y = df_encoded['MCI_CATEGORY']\n",
        "X = df_encoded.drop(columns=['MCI_CATEGORY'])"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "ljrV0L8-Yv6-"
      },
      "outputs": [],
      "source": [
        "from sklearn.model_selection import train_test_split\n",
        "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "P26C9qjyxjg_"
      },
      "source": [
        "## Random forest"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "DXGRsYkxY0bA"
      },
      "outputs": [],
      "source": [
        "# Train a random forest classifier\n",
        "from sklearn.ensemble import RandomForestClassifier\n",
        "clf = RandomForestClassifier(n_estimators=100, random_state=42)\n",
        "clf.fit(X_train, y_train)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "lVwHsoy_aszn",
        "outputId": "b2333d64-5df2-4e23-d19c-f2bccd8938d5"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Accuracy: 0.6907081563664731\n",
            "Precision: 0.6732061539751286\n",
            "Recall: 0.6907081563664731\n",
            "F1 Score: 0.6698829205656046\n",
            "ROC AUC Score: 0.8367142549448829\n"
          ]
        }
      ],
      "source": [
        "from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score\n",
        "\n",
        "#Evaluating the Random Forest model\n",
        "y_pred = clf.predict(X_test)\n",
        "\n",
        "accuracy = accuracy_score(y_test, y_pred)\n",
        "precision = precision_score(y_test, y_pred, average='weighted')\n",
        "recall = recall_score(y_test, y_pred, average='weighted')\n",
        "f1 = f1_score(y_test, y_pred, average='weighted')\n",
        "roc_auc = roc_auc_score(y_test, clf.predict_proba(X_test), multi_class='ovr')\n",
        "\n",
        "print(f\"Accuracy: {accuracy}\")\n",
        "print(f\"Precision: {precision}\")\n",
        "print(f\"Recall: {recall}\")\n",
        "print(f\"F1 Score: {f1}\")\n",
        "print(f\"ROC AUC Score: {roc_auc}\")\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "SGaypivbxoCA"
      },
      "source": [
        "## Gradient Boosting"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "ipDo6y-mxr2g"
      },
      "outputs": [],
      "source": [
        "from sklearn.ensemble import GradientBoostingClassifier\n",
        "clf = GradientBoostingClassifier(n_estimators=100, random_state=42)\n",
        "clf.fit(X_train, y_train)\n",
        "y_pred = clf.predict(X_test)\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "3Yb5FQ1dxtHr",
        "outputId": "951226f1-0ebe-4737-90c7-3623d7c72f9d"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Accuracy: 0.6371413712403571\n",
            "Precision: 0.6265935780399932\n",
            "Recall: 0.6371413712403571\n",
            "F1 Score: 0.5923585330432165\n",
            "ROC AUC Score: 0.8035542895307752\n"
          ]
        }
      ],
      "source": [
        "from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score\n",
        "\n",
        "y_pred = clf.predict(X_test)\n",
        "\n",
        "accuracy = accuracy_score(y_test, y_pred)\n",
        "precision = precision_score(y_test, y_pred, average='weighted')\n",
        "recall = recall_score(y_test, y_pred, average='weighted')\n",
        "f1 = f1_score(y_test, y_pred, average='weighted')\n",
        "roc_auc = roc_auc_score(y_test, clf.predict_proba(X_test), multi_class='ovr')\n",
        "\n",
        "print(f\"Accuracy: {accuracy}\")\n",
        "print(f\"Precision: {precision}\")\n",
        "print(f\"Recall: {recall}\")\n",
        "print(f\"F1 Score: {f1}\")\n",
        "print(f\"ROC AUC Score: {roc_auc}\")\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "eezXHvgpzfS6"
      },
      "source": [
        "## KNN"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "PcFj1OfyzgxK"
      },
      "outputs": [],
      "source": [
        "from sklearn.neighbors import KNeighborsClassifier\n",
        "clf = KNeighborsClassifier(n_neighbors=5)\n",
        "clf.fit(X_train, y_train)\n",
        "y_pred = clf.predict(X_test)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "background_save": true
        },
        "id": "8OMWpJQOzlr5",
        "outputId": "6132a5f7-53f5-4944-b4f2-190fa57d0bd4"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Accuracy: 0.5702608997139638\n",
            "Precision: 0.5275454711932136\n",
            "Recall: 0.5702608997139638\n",
            "F1 Score: 0.5318794904624676\n",
            "ROC AUC Score: 0.680298955136482\n"
          ]
        }
      ],
      "source": [
        "from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score\n",
        "\n",
        "y_pred = clf.predict(X_test)\n",
        "\n",
        "accuracy = accuracy_score(y_test, y_pred)\n",
        "precision = precision_score(y_test, y_pred, average='weighted')\n",
        "recall = recall_score(y_test, y_pred, average='weighted')\n",
        "f1 = f1_score(y_test, y_pred, average='weighted')\n",
        "roc_auc = roc_auc_score(y_test, clf.predict_proba(X_test), multi_class='ovr')\n",
        "\n",
        "print(f\"Accuracy: {accuracy}\")\n",
        "print(f\"Precision: {precision}\")\n",
        "print(f\"Recall: {recall}\")\n",
        "print(f\"F1 Score: {f1}\")\n",
        "print(f\"ROC AUC Score: {roc_auc}\")\n"
      ]
    }
  ],
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}